Analysis date: 2021-03-30
library(plyr)
library(gtools)
library(pheatmap)
library(Matrix)
library(Hmisc)
library(ggpubr)
library(ggrepel)
library(DESeq2)
library(tidyverse)
library(vsn)
library(fdrtool)
library(limma)
library(apeglm)
library(MultiAssayExperiment)
library(gplots)
library(matrixStats)
library(DEqMS)
source("data/Figure_layouts.R")
load("data/CLL_Proteomics_Setup.RData")
load("data/CLL_Proteomics_ConsensusClustering.RData")
colData(multiomics_MAE)$PG <- as.factor(CCP_group5[rownames(colData(multiomics_MAE))])
colData(multiomics_MAE)$CCP6_RNA <- as.factor(CCP_group6_RNA[rownames(colData(multiomics_MAE))])
Calculate_Limma <- function(assay_data, mut){
# prepare assay data
assay_data[is.infinite((assay_data))] <- NA
is.na(assay_data) <- NA
stopifnot(mut %in% colnames(proteomics_tbl_meta_biomart))
# prepare row data
row_data <- left_join((rownames(assay_data) %>% enframe(value = "rowname")),
(proteomics_tbl_meta_biomart %>%
dplyr::select(rowname, start_position, end_position, chromosome_name, mean_position) %>% unique %>%
group_by(rowname) %>%
dplyr::slice(1) %>% ungroup() ),
by=c("rowname")) %>% dplyr::select(-name) %>% as.data.frame()
rownames(row_data) <- row_data$rowname
stopifnot(all(rownames(assay_data)==rownames(row_data) ))
# prepare column data
col_data <- left_join((colnames(assay_data) %>% enframe(value = "colname")),
(proteomics_tbl_meta_biomart %>% dplyr::select(colname, mut) %>% unique() ),
by=c("colname") ) %>% dplyr::select(-name) %>% as.data.frame()
rownames(col_data) <- col_data$colname
stopifnot(all(colnames(assay_data)==rownames(col_data) ))
if( (length(unique(col_data[,mut])) ==2) & !is.logical(col_data[,mut]) & is.numeric(col_data[,mut]) ){
col_data[,mut] <- as.logical(col_data[,mut] == max(col_data[,mut]) )
}
#Creating an expression set
raw_dataE <- ExpressionSet(assayData = assay_data,
phenoData = AnnotatedDataFrame(col_data),
featureData = AnnotatedDataFrame(row_data))
validObject(raw_dataE)
if(!is.logical(pData(raw_dataE)[,colnames(pData(raw_dataE))==mut]))stop("The mutation you want to look at is not logical")
raw_dataE
# Limma
limma_data <- raw_dataE
comparison <- c("mut - wt")
colnames(pData(limma_data))[colnames(pData(limma_data)) == mut] <- "condition"
pData(limma_data) <- mutate(pData(limma_data),condition= if_else(condition, "mut","wt") )
limma_data <- limma_data[, !is.na(pData(limma_data)$cond)]
limma.cond <- factor(pData(limma_data)$condition, ordered = FALSE)
contrast.matrix <- model.matrix( ~ 0 + limma.cond)
colnames(contrast.matrix) <- gsub("limma.cond", "", colnames(contrast.matrix))
limma.object <- eBayes(
contrasts.fit(
lmFit(limma_data, design = contrast.matrix),
makeContrasts(contrasts = comparison, levels = contrast.matrix)
)
)
##### DEQMS
tmp <- metadata(multiomics_MAE)$protein_description %>% dplyr::select(`Gene Name`, MinPSMQuant) %>%
mutate(MinPSMQuant= as.numeric(MinPSMQuant))
psm.count.table <- tmp[,2] %>% as.data.frame()
rownames(psm.count.table) <- tmp$`Gene Name`
limma.object$count = psm.count.table[rownames(limma.object$coefficients),"MinPSMQuant"]
limma_DEQMS = spectraCounteBayes(limma.object)
DEqMS.results = outputResult(limma_DEQMS,coef_col = 1)
limma_results_i <- DEqMS.results
limma_results_i <- subset(limma_results_i, !is.na(logFC))
limma_results_i$comparison <- comparison
limma_results_i$mut <- mut
colnames(limma_results_i)[c(8,9,10, 14, 15, 16)] <-
c("limma.t_beforeDEQMS", "pvalue.limma_beforeDEQMS" , "fdr.limma_beforeDEQMS", "t", "pvalue.limma", "fdr.limma")
limma_results_i$fdr <- limma_results_i$fdr.limma
limma_results_i$pvalue <- limma_results_i$pvalue.limma
return(limma_results_i)
}
Annotate_Limma_Results <- function(limma_results){
fdr_hit_threshold <- 0.001
fdr_candidate_threshold = 0.01
fc_hit_threshold <- 0.5
fc_candidate_threshold <- 0.3
limma_results$hit <-
with(limma_results, ifelse(fdr <= fdr_hit_threshold & abs(logFC) >= fc_hit_threshold, TRUE, FALSE))
limma_results$hit_annotation <- with(limma_results,
ifelse(fdr <= fdr_hit_threshold & abs(logFC) >= fc_hit_threshold,
"hit",
ifelse(fdr <= fdr_candidate_threshold & abs(logFC) >= fc_candidate_threshold,
"candidate", "no hit")))
limma_results$hit_annotation <- factor(limma_results$hit_annotation, ordered = TRUE, levels = c("hit", "candidate", "no hit"))
return(limma_results)
}
change_chr_abber_brackets <- function(levels_alt){
levels_alt %>%
str_replace(., "del", "del(") %>% str_replace(., "gain", "gain(") %>%
str_replace(., "q", ")(q") %>% str_replace(., "p", ")(p") %>%
str_replace(., "11\\)\\(q", "11)(q22.3)") %>%
str_replace(., "p13", "p13)") %>% str_replace(., "q14", "q14)") %>%
str_replace(., "q24", "q24)")
}
limma_results <- NULL
mut_to_limma <- colnames(proteomics_tbl_meta_biomart)[-(1:12)][c(-15)]
for(i in 1:length(mut_to_limma)){
m <- mut_to_limma[i]
print(m)
limma_results <- bind_rows(limma_results,
Calculate_Limma(assay_data = assay(multiomics_MAE[prot_few_nas ,pat_overlap_prot_RNA ,"proteomics"]), mut = m))
}
## [1] "chrom_abber_del11q"
## [1] "chrom_abber_del13q14"
## [1] "chrom_abber_del17p13"
## [1] "chrom_abber_gain8q24"
## [1] "chrom_abber_trisomy12"
## [1] "SNPs_ATM"
## [1] "SNPs_BIRC3"
## [1] "SNPs_EGR2"
## [1] "SNPs_NOTCH1"
## [1] "SNPs_POT1"
## [1] "SNPs_SF3B1"
## [1] "SNPs_TP53"
## [1] "SNPs_XPO1"
## [1] "health_record_bin_IGHV_mutated"
## [1] "health_record_bin_treated"
limma_results$mut %>% unique
## [1] "chrom_abber_del11q" "chrom_abber_del13q14"
## [3] "chrom_abber_del17p13" "chrom_abber_gain8q24"
## [5] "chrom_abber_trisomy12" "SNPs_ATM"
## [7] "SNPs_BIRC3" "SNPs_EGR2"
## [9] "SNPs_NOTCH1" "SNPs_POT1"
## [11] "SNPs_SF3B1" "SNPs_TP53"
## [13] "SNPs_XPO1" "health_record_bin_IGHV_mutated"
## [15] "health_record_bin_treated"
ggplot(data = limma_results) +
geom_histogram(aes(pvalue.limma, alpha = 0.5), bins = 40) +
guides(alpha = FALSE) +
xlab("p-value") +
facet_wrap( ~ mut +comparison, scale = "free_y") +
coord_cartesian(xlim = c(0, 1)) +
pp_sra + ggtitle("p-value histograms")
limma_results <- Annotate_Limma_Results(limma_results)
with(limma_results, table(mut, hit_annotation))
## hit_annotation
## mut hit candidate no hit
## chrom_abber_del11q 0 6 7305
## chrom_abber_del13q14 0 1 7310
## chrom_abber_del17p13 0 10 7301
## chrom_abber_gain8q24 1 0 7310
## chrom_abber_trisomy12 67 221 7023
## health_record_bin_IGHV_mutated 16 73 7222
## health_record_bin_treated 2 36 7273
## SNPs_ATM 0 0 7311
## SNPs_BIRC3 0 4 7307
## SNPs_EGR2 4 10 7297
## SNPs_NOTCH1 0 0 7311
## SNPs_POT1 0 0 7311
## SNPs_SF3B1 29 53 7229
## SNPs_TP53 2 17 7292
## SNPs_XPO1 0 2 7309
message("Number of up and downregulated hits")
## Number of up and downregulated hits
with(limma_results %>% filter(hit==TRUE), table(mut, sign(logFC)))
##
## mut -1 1
## chrom_abber_gain8q24 1 0
## chrom_abber_trisomy12 13 54
## health_record_bin_IGHV_mutated 5 11
## health_record_bin_treated 2 0
## SNPs_EGR2 0 4
## SNPs_SF3B1 26 3
## SNPs_TP53 0 2
dif_proteins_P_plot <- with(limma_results %>% mutate(dir=sign(logFC)), table("mut"=paste0(mut,"dir", dir), hit)) %>% as_tibble() %>%
separate(mut, into = c("mut", "dir"), sep = "dir") %>%
filter(mut!="health_record_bin_treated") %>%
mutate(dir=as.numeric(dir)) %>%
mutate(n=n*dir) %>%
mutate(mut=gsub("chrom_abber", "CNVs", mut),
mut=gsub("health_record_bin", "IGHV", mut),
mut= gsub("SNPs", "SNVs", mut) ) %>%
separate(mut, into = c("Type", "mut"), sep="_", extra = "merge" ) %>%
mutate(mut = change_chr_abber_brackets(mut) ) %>%
filter(hit==TRUE) %>%
arrange(desc(n)) %>%
mutate(mut = as.factor(mut)) %>%
mutate(mut = factor(mut, levels = unique(.$mut))) %>%
mutate(dir = if_else(dir == 1, "up", if_else(dir == (-1), "down", "NA" ))) %>%
ggplot(aes(mut, n)) +
geom_col(aes(fill=as.character(dir))) +
ylab("Nr. differentially abundant proteins") +
facet_grid(~Type, scales = "free_x", space="free") +
theme_bw() +
theme(panel.grid = element_blank(), axis.text.x = element_text(angle = 90), axis.title.x = element_blank(),
strip.background = element_rect(fill = "white"), strip.text = element_text(face = "bold")) +
scale_fill_manual(values = c("#0571b0", "#ca0020"), drop=FALSE) +
geom_hline(yintercept = 0, color="darkgray")
dif_proteins_P_plot + geom_text(aes(mut, y=(-(-15-(sign(n)*45))), label=n ))
dif_proteins_P_FDR5_plot <-
with(limma_results %>%
mutate(dir=sign(logFC),
hit = if_else(fdr <= 0.05 &
abs(logFC) >= 0.5, TRUE,
FALSE )),
table("mut"=paste0(mut,"dir", dir), hit)) %>%
as_tibble() %>%
separate(mut, into = c("mut", "dir"), sep = "dir") %>%
filter(mut!="health_record_bin_treated") %>%
mutate(dir=as.numeric(dir)) %>%
mutate(n=n*dir) %>%
mutate(mut=gsub("chrom_abber", "CNVs", mut),
mut=gsub("health_record_bin", "IGHV", mut),
mut= gsub("SNPs", "SNVs", mut) ) %>%
separate(mut, into = c("Type", "mut"), sep="_", extra = "merge" ) %>%
mutate(mut = change_chr_abber_brackets(mut) ) %>%
filter(hit==TRUE) %>%
arrange(desc(n)) %>%
mutate(mut = as.factor(mut)) %>%
mutate(mut = factor(mut, levels = unique(.$mut))) %>%
mutate(dir = if_else(dir == 1, "up", if_else(dir == (-1), "down", "NA" ))) %>%
ggplot(aes(mut, n)) +
geom_col(aes(fill=dir)) +
ylab("Nr. differentially abundant proteins") +
facet_grid(~Type, scales = "free_x", space="free") +
theme_bw() +
theme(panel.grid = element_blank(), axis.text.x = element_text(angle = 90), axis.title.x = element_blank(),
strip.background = element_rect(fill = "white"), strip.text = element_text(face = "bold")) +
scale_fill_manual(values = c("#0571b0", "#ca0020"), drop=FALSE) +
geom_hline(yintercept = 0, color="darkgray")
limma_protein_all_volcano_plots <-
limma_results %>%
mutate(mut=gsub( "chrom_abber_", "", mut),
mut=gsub( "SNPs_", "", mut),
mut=gsub( "health_record_bin_", "", mut),
FDR = if_else(fdr > 0.5, "no hit",
if_else(fdr < 0.001, "FDR 0.1%", "FDR 5%"))) %>%
mutate(mut = change_chr_abber_brackets(mut) ) %>%
ggplot( aes(logFC, -log10(pvalue), colour = FDR)) +
geom_vline(aes(xintercept = 0)) +
geom_point(alpha=0.5) +
facet_wrap( ~ mut , ncol = 4) +
xlab("log2(fold change)") +
scale_color_manual(values=c( "#ca0020", "#0571b0", "grey40")) +
pp_sra +
theme(legend.position = "top", legend.title = element_blank())
limma_protein_all_volcano_plots +
geom_text(aes(label = rowname),
data = subset( (limma_results %>%
mutate(mut=gsub( "chrom_abber_", "", mut),
mut=gsub( "SNPs_", "", mut),
mut=gsub( "health_record_bin_", "", mut)) %>%
mutate(mut = change_chr_abber_brackets(mut) )),
hit_annotation == "hit"),
vjust = 0, nudge_y = 0.1, size = 3, check_overlap = FALSE,
color= "#ca0020")
tmp <- limma_results %>% filter(hit==TRUE) %>%
mutate(direction=sign(logFC)) %>%
group_by(mut) %>% arrange(fdr) %>%
dplyr::select(rowname, direction)
## Adding missing grouping variables: `mut`
message("Upregulated hits")
## Upregulated hits
sapply(group_nest(tmp)$mut, function(m){
tmp %>% filter(mut==m, direction==1) %>% .$rowname
})
## $chrom_abber_gain8q24
## character(0)
##
## $chrom_abber_trisomy12
## [1] "EEA1" "GOLGA3" "DERA" "PTPN11" "CAMKK2" "TIGAR"
## [7] "ITPRIP" "DENR" "PTPN6" "LDHB" "IRAK4" "HIP1R"
## [13] "PYCARD" "TSFM" "NUDT1" "PRICKLE1" "PNP" "MLX"
## [19] "SAMHD1" "GNB4" "SLC2A6" "SHMT2" "RILPL2" "CORO1C"
## [25] "TWF1" "RHOF" "IKBIP" "SPAG1" "CKLF" "PPP2R5B"
## [31] "NT5E" "BCAT1" "CLIP2" "ITGAL" "UBE2H" "ABCD2"
## [37] "NLRC4" "SCPEP1" "MLXIP" "BCL7A" "TTC39C" "FES"
## [43] "CYFIP1" "CNOT8" "LDB1" "TMEM19" "ITGB2" "DPYSL2"
## [49] "TMEM231" "RAB29" "INF2" "RNF135" "BIN2" "ITGAX"
##
## $health_record_bin_IGHV_mutated
## [1] "FTH1" "FTL" "USP6NL" "ZBTB20" "TBC1D2B" "ABI3" "SLAMF1"
## [8] "FCRL1" "UBE2E2" "FCRL2" "ADD3"
##
## $health_record_bin_treated
## character(0)
##
## $SNPs_EGR2
## [1] "SLC2A1" "ACTN1" "EPPK1" "ZNF446"
##
## $SNPs_SF3B1
## [1] "SUGP1" "ATAD3B" "EYA3"
##
## $SNPs_TP53
## [1] "TP53" "FUT11"
message("Downregulated hits")
## Downregulated hits
sapply(group_nest(tmp)$mut, function(m){
tmp %>% filter(mut==m, direction==-1) %>% .$rowname
})
## $chrom_abber_gain8q24
## [1] "MCPH1"
##
## $chrom_abber_trisomy12
## [1] "PELI1" "TGFBR2" "TCEAL3" "ACYP1" "CRAT" "TCF4"
## [7] "SELENOM" "TLE4" "CAMK2D" "SAMSN1" "TCEAL4" "CTH"
## [13] "SLC25A23"
##
## $health_record_bin_IGHV_mutated
## [1] "CPT1A" "FAM114A1" "ZAP70" "SERPINB6" "SEPT10"
##
## $health_record_bin_treated
## [1] "MTSS1" "ICAM2"
##
## $SNPs_EGR2
## character(0)
##
## $SNPs_SF3B1
## [1] "TPP2" "MAP3K7" "PPP2R5A" "WASHC5" "WASHC4" "VPS8" "IQGAP1"
## [8] "WASHC1" "NAA16" "DPH5" "TTI1" "CCDC88B" "SEPSECS" "SEPT6"
## [15] "TTI2" "WASHC3" "ABCB7" "DDX58" "DIP2A" "IDUA" "RECQL"
## [22] "UQCC1" "JMY" "SUN1" "MROH1" "SEPT2"
##
## $SNPs_TP53
## character(0)
limma_results %>% filter(mut=="chrom_abber_trisomy12") %>%
mutate("Affected_region" = if_else( chromosome_name=="12", "TRUE",
if_else(hit_annotation == "hit", "FALSE", "not altered"))) %>%
mutate("Affected_region"= factor(Affected_region, levels=c("FALSE","TRUE", "not altered"))) %>%
mutate(logFC= if_else(logFC > log2(5), log2(5.2),
if_else(logFC<(-log2(5)), -log2(5.2), logFC) ) ) %>%
ggplot(aes(logFC, -log10(pvalue))) +
geom_vline(aes(xintercept = 0)) +
geom_point(alpha=0.8, aes(colour = Affected_region)) +
geom_text(aes(label = rowname, color=chromosome_name=="12"),
data = subset(limma_results %>% filter(mut=="chrom_abber_trisomy12"), hit_annotation == "hit"),
vjust = 0, nudge_y = 0.1, size = 3, check_overlap = FALSE) +
xlab("log2(fold change)") +
scale_color_manual(values = c("#0571b0", "orange1", "grey"), drop=FALSE) +
pp_sra+
guides(color=FALSE)+
coord_cartesian(xlim=c(-log2(5), log2(5)), ylim=c(0, -log10(min(limma_results$pvalue)))) +
ggtitle("trisomy12")+
theme( plot.title = element_text(size = 20))
tmp <- (limma_results %>%
filter(mut=="chrom_abber_trisomy12", chromosome_name=="12") %>%
.$logFC>0) %>% table %>% as_tibble()
colnames(tmp) <- c("fc", "n")
tmp <- tmp %>% arrange(desc(`fc`))
tmp <- tmp %>% mutate(label=paste( round(n/sum(tmp$n)*100, 1), "%" ), cums =cumsum(tmp$n) )
tmp %>%
mutate("fc"=as.factor(`fc`)) %>%
ggplot(aes(x="", fill=`fc`,y= n)) +
geom_col() +
ggtitle("Proteins on chromosome 12 with positive logFC in trisomy 12") +
coord_polar("y", start=0) +
pp_sra +
theme(axis.title = element_blank(), axis.ticks = element_blank())+
scale_fill_manual(values = c( "grey", "#0571b0"), drop=FALSE)+
annotate(geom = "text", y = tmp$cums-(tmp$n/2), x = 1, label = tmp$label)
tmp <- (limma_results %>%
filter(mut=="chrom_abber_trisomy12", hit==TRUE) %>%
.$chromosome_name == "12") %>%
table %>% as_tibble()
colnames(tmp) <- c("chr12", "n")
tmp <- tmp %>% arrange(desc(`chr12`))
tmp <- tmp %>% mutate(label=paste( round(n/sum(tmp$n)*100, 1), "%" ), cums =cumsum(tmp$n) )
piechart_hits_prot_chr_tris12 <- tmp %>%
mutate("chr12"=as.factor(`chr12`)) %>%
ggplot(aes(x="", fill=`chr12`,y= n)) +
geom_col() +
ggtitle("Chromosome location of hits in trisomy 12") +
coord_polar("y", start=0) +
pp_sra +
theme(axis.title = element_blank(), axis.ticks = element_blank())+
scale_fill_manual(values = c( "grey", "#0571b0"), drop=FALSE)
piechart_hits_prot_chr_tris12 +
annotate(geom = "text", y = tmp$cums-(tmp$n/2), x = 1, label = tmp$label)
####### Input of all Values
tris_pats <- colData(multiomics_MAE) %>% as_tibble() %>% filter(!is.na(trisomy12)) %>% .$patient_ID
longFormat((multiomics_MAE[prot_few_nas , tris_pats ,"proteomics"])) %>%
as_tibble() %>%
dplyr::select("NAME"=rowname, "DESCRIPTION"=assay , primary, value) %>%
unique() %>%
spread(primary, value) #%>%
#write.table(file = "/Users/sophierabe/Desktop/PhD/Labor/Proteomics/CLL/GSEA_CLL_Proteomics/190726_GSEA_data_long_gradient_trisomy12.txt", quote = FALSE, row.names = FALSE, sep = "\t", na="")
tmp_pats <- longFormat((multiomics_MAE[prot_few_nas , tris_pats ,"proteomics"])) %>%
as_tibble() %>%
dplyr::select("NAME"=rowname, primary, value) %>%
unique() %>%
spread(primary, value) %>%
dplyr::select(-1) %>%
colnames() %>%
enframe()
tmp_pats <- tmp_pats %>%
left_join(.,
(proteomics_tbl_meta_biomart %>% dplyr::select(primary, trisomy12) %>% unique) ,
by=c("value"="primary") )
tmp_pats %>%
dplyr::select(trisomy12) #%>% t() %>%
#write.table(file = "/Users/sophierabe/Desktop/PhD/Labor/Proteomics/CLL/GSEA_CLL_Proteomics/190726_GSEA_phenotypelabels_long_gradient_trisomy12.cls", quote = FALSE, row.names = FALSE, col.names = FALSE, sep = " ", na="")
dim(tmp_pats)
sum(tmp_pats$trisomy12)
######### Only input proteins which are not on chromosome 12
prot_no_chr12 <- limma_results %>%
filter(mut=="chrom_abber_trisomy12", chromosome_name!="chr12") %>% .$rowname %>% unique
proteomics_tbl_meta_biomart %>%
filter(!is.na(trisomy12), rowname %in% prot_no_chr12) %>%
dplyr::select("NAME"=rowname,"DESCRIPTION"=assay, primary, value) %>%
unique() %>%
spread(primary, value) #%>%
#write.table(file = "/Users/sophierabe/Desktop/PhD/Labor/Proteomics/CLL/GSEA_CLL_Proteomics/190726_GSEA_data_long_gradient_trisomy12_no_chr12.txt", quote = FALSE, row.names = FALSE, sep = "\t", na="")
Comment: These lists were edited later to fit the GSEA format
limma_results %>% filter(mut=="chrom_abber_del13q14") %>%
mutate("Affected_region" = if_else( (chromosome_name=="13" & start_position>47000000 & start_position<51000000 ), "TRUE",
if_else(hit_annotation == "hit", "FALSE", "not altered"))) %>%
mutate("Affected_region"= factor(Affected_region, levels=c("FALSE","TRUE", "not altered"))) %>%
mutate(logFC= if_else(logFC > log2(5), log2(5.2),
if_else(logFC<(-log2(5)), -log(5.2), logFC) ) ) %>%
ggplot(aes(logFC, -log10(pvalue))) +
geom_vline(aes(xintercept = 0)) +
geom_point(alpha=0.8, aes(colour = Affected_region)) +
geom_text(aes(label = rowname, color=(chromosome_name=="13" & start_position>47000000 & start_position<51000000)),
data = subset(limma_results %>% filter(mut=="chrom_abber_del13q14"), hit_annotation == "hit"),
vjust = 0, nudge_y = 0.1, size = 3, check_overlap = FALSE) +
xlab("log2(fold change)") +
scale_color_manual(values = c("#0571b0", "orange1", "grey"), drop=FALSE) +
pp_sra+
guides(color=FALSE)+
coord_cartesian(xlim=c(-log2(5), log2(5)), ylim=c(0, -log10(min(limma_results$pvalue))))+
ggtitle("del13q14")+
theme( plot.title = element_text(size = 20))
limma_results %>% filter(mut=="chrom_abber_del11q") %>%
mutate("Affected_region" = if_else( (chromosome_name=="11" & start_position>97400000 & start_position<110600000), "TRUE",
if_else(hit_annotation == "hit", "FALSE", "not altered"))) %>%
mutate("Affected_region"= factor(Affected_region, levels=c("FALSE","TRUE", "not altered"))) %>%
mutate(logFC= if_else(logFC > log2(5), log2(5.2),
if_else(logFC<(-log2(5)), -log2(5.2), logFC) ) ) %>%
ggplot(aes(logFC, -log10(pvalue))) +
geom_vline(aes(xintercept = 0)) +
geom_point(alpha=0.8, aes(colour = Affected_region)) +
geom_text(aes(label = rowname, color=(chromosome_name=="11" & start_position>97400000 & start_position<110600000)),
data = subset(limma_results %>% filter(mut=="chrom_abber_del11q"), hit_annotation == "hit"),
vjust = 0, nudge_y = 0.1, size = 3, check_overlap = FALSE) +
xlab("log2(fold change)") +
scale_color_manual(values = c("#0571b0", "orange1", "grey"), drop=FALSE) +
pp_sra+
guides(color=FALSE)+
coord_cartesian(xlim=c(-log2(5), log2(5)), ylim=c(0, -log10(min(limma_results$pvalue)))) +
ggtitle("del11q")+
theme(plot.title = element_text(size = 20))
limma_results %>% filter(mut=="chrom_abber_del17p13") %>%
mutate("Affected_region" = if_else( (chromosome_name=="17" & mean_position<10800000), "TRUE",
if_else(hit_annotation == "hit", "FALSE", "not altered"))) %>%
mutate("Affected_region"= factor(Affected_region, levels=c("FALSE","TRUE", "not altered"))) %>%
mutate(logFC= if_else(logFC > log2(5), log2(5.2),
if_else(logFC<(-log2(5)), -log2(5.2), logFC) ) ) %>%
ggplot(aes(logFC, -log10(pvalue))) +
geom_vline(aes(xintercept = 0)) +
geom_point(alpha=0.8, aes(colour = Affected_region)) +
geom_text(aes(label = rowname, color=(chromosome_name=="17" & mean_position<10800000)),
data = subset(limma_results %>% filter(mut=="chrom_abber_del17p13"), hit_annotation == "hit"),
vjust = 0, nudge_y = 0.1, size = 3, check_overlap = FALSE) +
xlab("log2(fold change)") +
scale_color_manual(values = c("#0571b0", "orange1", "grey"), drop=FALSE) +
pp_sra+
guides(color=FALSE)+
coord_cartesian(xlim=c(-log2(5), log2(5)), ylim=c(0, -log10(min(limma_results$pvalue)))) +
ggtitle("del17p13")+
theme(plot.title = element_text(size = 20))
XPO_volcano_plot <- limma_results %>% filter(mut=="SNPs_XPO1") %>%
mutate("Affected_region" = if_else( rowname=="XPO1", "TRUE",
if_else(hit_annotation == "hit", "FALSE", "not altered"))) %>%
mutate("Affected_region"= factor(Affected_region, levels=c("FALSE","TRUE", "not altered"))) %>%
ggplot(aes(logFC, -log10(pvalue))) +
geom_vline(aes(xintercept = 0)) +
geom_point(alpha=0.8, aes(colour = Affected_region)) +
geom_text(aes(label = rowname),
data = subset(limma_results %>% filter(mut=="SNPs_XPO1", rowname!="XPO1"), hit_annotation == "hit"),
vjust = 0, size = 3, nudge_y = (-1), nudge_x = -0.3,
check_overlap = FALSE, color="#0571b0") +
geom_text(aes(label = rowname, color=rowname=="XPO1"),
data = subset(limma_results %>% filter(mut=="SNPs_XPO1", rowname=="XPO1")),
vjust = 0, size = 3, nudge_x = 0.2, nudge_y = (-0.5),
check_overlap = FALSE) +
xlab("log2(fold change)") +
scale_color_manual(values = c("#0571b0", "orange1", "grey"), drop=FALSE) +
pp_sra+
coord_cartesian(xlim=c(-log2(5), log2(5)) )
XPO_volcano_plot + ggtitle("XPO1")+
theme( plot.title = element_text(size = 20)) +
guides(color=FALSE)
TP53_volcano_plot <- limma_results %>% filter(mut=="SNPs_TP53") %>%
mutate( rowname = if_else(rowname == "TP53", "P53", rowname) ) %>%
mutate("Affected_region" = if_else( rowname=="P53", "TRUE",
if_else(hit_annotation == "hit", "FALSE", "not altered"))) %>%
mutate("Affected_region"= factor(Affected_region, levels=c("FALSE","TRUE", "not altered"))) %>%
ggplot(aes(logFC, -log10(pvalue))) +
geom_vline(aes(xintercept = 0)) +
geom_point(alpha=0.8, aes(colour = Affected_region)) +
geom_text(aes(label = rowname),
data = subset(limma_results %>%
mutate( rowname = if_else(rowname == "TP53", "P53", rowname) ) %>%
filter(mut=="SNPs_TP53", rowname!="P53"), hit_annotation == "hit"),
vjust = 0, size = 3, nudge_y = -0.3, nudge_x = -0.2, check_overlap = FALSE, color="#0571b0") +
geom_text(aes(label = rowname, color=rowname=="P53"),
data = subset(limma_results %>%
mutate( rowname = if_else(rowname == "TP53", "P53", rowname) ) %>%
filter(mut=="SNPs_TP53", rowname=="P53")),
vjust = 0, size = 3, nudge_y = -0.3, nudge_x = -0.2, check_overlap = FALSE) +
xlab("log2(fold change)") +
scale_color_manual(values = c("#0571b0", "orange1", "grey"), drop=FALSE) +
pp_sra+
coord_cartesian(xlim=c(-log2(2.2), log2(2.2)) )
TP53_volcano_plot + ggtitle("TP53")+
theme( plot.title = element_text(size = 20)) +
guides(color=FALSE)
ATM_volcano_plot <- limma_results %>% filter(mut=="SNPs_ATM") %>%
mutate("Affected_region" = if_else( rowname=="ATM", "TRUE",
if_else(hit_annotation == "hit", "FALSE", "not altered"))) %>%
mutate("Affected_region"= factor(Affected_region, levels=c("FALSE","TRUE", "not altered"))) %>%
ggplot(aes(logFC, -log10(pvalue))) +
geom_vline(aes(xintercept = 0)) +
geom_point(alpha=0.8, aes(colour = Affected_region)) +
geom_text(aes(label = rowname),
data = subset(limma_results %>% filter(mut=="SNPs_ATM", rowname!="ATM"), hit_annotation == "hit"),
vjust = 0, size = 3, nudge_y = -0.3, nudge_x = -0.2, check_overlap = FALSE, color="#0571b0") +
geom_text(aes(label = rowname, color=rowname=="ATM"),
data = subset(limma_results %>% filter(mut=="SNPs_ATM", rowname=="ATM")),
vjust = 0, size = 3, nudge_y = -0.3, nudge_x = -0.2, check_overlap = FALSE) +
xlab("log2(fold change)") +
scale_color_manual(values = c("#0571b0", "orange1", "grey"), drop=FALSE) +
pp_sra+
coord_cartesian(xlim=c(-log2(2.2), log2(2.2)) )
ATM_volcano_plot + ggtitle("ATM")+
theme( plot.title = element_text(size = 20)) +
guides(color=FALSE)
selpats <- colData(multiomics_MAE) %>% as_tibble() %>% filter(!is.na(PG)) %>% .$patient_ID
proteomics_tbl_meta_biomart_tmp <- proteomics_tbl_meta_biomart
proteomics_tbl_meta_biomart <- left_join(proteomics_tbl_meta_biomart, enframe(CCP_group5==5, name = "primary", value="PG"), by="primary" )
limma_results_CC5_5_all <- Calculate_Limma(assay_data = assay(multiomics_MAE[prot_few_nas , selpats ,"proteomics"]), mut = "PG")
## ExperimentList contains data.frame or DataFrame,
## potential for errors with mixed data types
## harmonizing input:
## removing 457 sampleMap rows not in names(experiments)
proteomics_tbl_meta_biomart <- proteomics_tbl_meta_biomart_tmp
ggplot(data = limma_results_CC5_5_all) +
geom_histogram(aes(pvalue.limma, alpha = 0.5), bins = 40) +
guides(alpha = FALSE) +
xlab("p-value") +
facet_wrap( ~ mut , scale = "free_y") +
coord_cartesian(xlim = c(0, 1)) +
pp_sra +
ggtitle("PG groups")
limma_results_CC5_5_all <- Annotate_Limma_Results(limma_results_CC5_5_all)
limma_results_CC5_5_all %>%
mutate("Affected_region" = if_else(hit_annotation == "hit", "hit", "not altered")) %>%
mutate("Affected_region"= factor(Affected_region, levels=c("hit", "not altered"))) %>%
ggplot(aes(logFC, -log10(pvalue))) +
geom_vline(aes(xintercept = 0)) +
geom_point(alpha=0.8, aes(colour = Affected_region)) +
geom_text(aes(label = rowname), color="#0571b0",
data = subset(limma_results_CC5_5_all %>% filter( hit_annotation == "hit"),
vjust = 0, nudge_y = 0.1, size = 3, check_overlap = FALSE)) +
facet_wrap( ~ mut, ncol = 1) +
xlab("log2(fold change)") +
scale_color_manual(values = c("#0571b0", "grey"), drop=FALSE) +
pp_sra+
guides(color=guide_legend(title = "PG groups"))+
ggtitle("Consensus cluster plus group 5 against all others")
limma_results_CC5_5_all %>% filter(hit==TRUE) %>% .$rowname
## [1] "VAV2" "AFP" "ZNRF2" "SERPINC1" "BIN2" "TF"
## [7] "CLEC3B" "FBLN1" "ARSA" "NRAS" "PPP3R1" "ITIH2"
## [13] "PTPN6" "A2M" "MAPK15" "TST" "ISYNA1" "TTC9"
## [19] "SLC38A2" "MAPKAPK5" "KYNU" "AKT3" "SLC22A18" "SERPINF2"
gene.data <- limma_results_CC5_5_all %>%
filter( rowname %in% BCR_genes$symbol) %>%
dplyr::select(logFC) %>% unlist
names(gene.data) <- limma_results_CC5_5_all %>%
filter( rowname %in% BCR_genes$symbol) %>%
dplyr::select(rowname) %>% unlist
oldwd <- getwd()
setwd("/Volumes/sd17b003/Sophie/Analysis/CLL_Proteomics/CLL_Proteomics_final/Figures/pathview/")
pathview::pathview(pathway.id = "04662", gene.data = gene.data, species = "hsa", gene.idtype = "SYMBOL",
bins = 10,
limit = list(gene=c( -max(abs(gene.data), na.rm = TRUE) , max(abs(gene.data), na.rm = TRUE))),
kegg.native = TRUE, out.suffix = "PG5")
pathview::pathview(pathway.id = "04662", gene.data = gene.data, species = "hsa", gene.idtype = "SYMBOL",
bins = 10,
limit = list(gene=c( -max(abs(gene.data), na.rm = TRUE) , max(abs(gene.data), na.rm = TRUE))),
kegg.native = FALSE, out.suffix = "PG5")
setwd(oldwd)
####### Input of all Values
longFormat((multiomics_MAE[prot_few_nas , selpats ,"proteomics"])) %>%
as_tibble() %>%
dplyr::select("NAME"=rowname, "DESCRIPTION"=assay , primary, value) %>%
unique() %>%
spread(primary, value) #%>%
#write.table(file = "/Users/sophierabe/Desktop/PhD/Labor/Proteomics/CLL/GSEA_CLL_Proteomics/190927_GSEA_data_long_gradient_CCP_5vsall.txt", quote = FALSE, row.names = FALSE, sep = "\t", na="")
tmp_pats <- longFormat((multiomics_MAE[prot_few_nas , selpats ,"proteomics"])) %>%
as_tibble() %>%
dplyr::select("NAME"=rowname, primary, value) %>%
unique() %>%
spread(primary, value) %>%
dplyr::select(-1) %>%
colnames() %>%
enframe()
tmp_pats <- tmp_pats %>%
left_join(.,
enframe(multiomics_MAE$PG, value="PG", name="primary" ) %>% mutate("PG"=PG==5) ,
by=c("value"="primary") )
tmp_pats %>%
dplyr::select(PG) #%>% t() %>%
#write.table(file = "/Users/sophierabe/Desktop/PhD/Labor/Proteomics/CLL/GSEA_CLL_Proteomics/190927_GSEA_phenotypelabels_long_gradient__CCP_5vsall.cls", quote = FALSE, row.names = FALSE, col.names = FALSE, sep = " ", na="")
dim(tmp_pats)
sum(tmp_pats$PG==TRUE)
save(dif_proteins_P_plot, XPO_volcano_plot, TP53_volcano_plot, ATM_volcano_plot, piechart_hits_prot_chr_tris12,
dif_proteins_P_FDR5_plot, limma_protein_all_volcano_plots,
file = "RData_plots/CLL_Proteomics_Limma_Plots.RData")
save(limma_results,
file = "data/CLL_Proteomics_LimmaProteomics.RData")
sessionInfo()
## R version 4.0.2 (2020-06-22)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.7
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] DEqMS_1.6.0 gplots_3.1.1
## [3] MultiAssayExperiment_1.14.0 apeglm_1.10.0
## [5] limma_3.44.3 fdrtool_1.2.16
## [7] vsn_3.56.0 forcats_0.5.1
## [9] stringr_1.4.0 dplyr_1.0.3
## [11] purrr_0.3.4 readr_1.4.0
## [13] tidyr_1.1.2 tibble_3.0.6
## [15] tidyverse_1.3.0 DESeq2_1.28.1
## [17] SummarizedExperiment_1.18.2 DelayedArray_0.14.1
## [19] matrixStats_0.58.0 Biobase_2.48.0
## [21] GenomicRanges_1.40.0 GenomeInfoDb_1.24.2
## [23] IRanges_2.22.2 S4Vectors_0.26.1
## [25] BiocGenerics_0.34.0 ggrepel_0.9.1
## [27] ggpubr_0.4.0 Hmisc_4.4-2
## [29] ggplot2_3.3.3 Formula_1.2-4
## [31] survival_3.2-7 lattice_0.20-41
## [33] Matrix_1.3-2 pheatmap_1.0.12
## [35] gtools_3.8.2 plyr_1.8.6
##
## loaded via a namespace (and not attached):
## [1] readxl_1.3.1 backports_1.2.1 splines_4.0.2
## [4] BiocParallel_1.22.0 digest_0.6.27 htmltools_0.5.1.1
## [7] magrittr_2.0.1 checkmate_2.0.0 memoise_2.0.0
## [10] cluster_2.1.0 openxlsx_4.2.3 annotate_1.66.0
## [13] modelr_0.1.8 bdsmatrix_1.3-4 jpeg_0.1-8.1
## [16] colorspace_2.0-0 blob_1.2.1 rvest_0.3.6
## [19] haven_2.3.1 xfun_0.20 crayon_1.4.0
## [22] RCurl_1.98-1.2 jsonlite_1.7.2 genefilter_1.70.0
## [25] glue_1.4.2 gtable_0.3.0 zlibbioc_1.34.0
## [28] XVector_0.28.0 car_3.0-10 abind_1.4-5
## [31] scales_1.1.1 mvtnorm_1.1-1 DBI_1.1.1
## [34] rstatix_0.6.0 Rcpp_1.0.6 xtable_1.8-4
## [37] emdbook_1.3.12 htmlTable_2.1.0 foreign_0.8-81
## [40] bit_4.0.4 preprocessCore_1.50.0 htmlwidgets_1.5.3
## [43] httr_1.4.2 RColorBrewer_1.1-2 ellipsis_0.3.1
## [46] farver_2.0.3 pkgconfig_2.0.3 XML_3.99-0.5
## [49] nnet_7.3-15 dbplyr_2.0.0 locfit_1.5-9.4
## [52] labeling_0.4.2 tidyselect_1.1.0 rlang_0.4.10
## [55] AnnotationDbi_1.50.3 munsell_0.5.0 cellranger_1.1.0
## [58] tools_4.0.2 cachem_1.0.1 cli_2.3.0
## [61] generics_0.1.0 RSQLite_2.2.3 broom_0.7.4
## [64] evaluate_0.14 fastmap_1.1.0 yaml_2.2.1
## [67] knitr_1.31 bit64_4.0.5 fs_1.5.0
## [70] zip_2.1.1 caTools_1.18.1 xml2_1.3.2
## [73] compiler_4.0.2 rstudioapi_0.13 curl_4.3
## [76] png_0.1-7 affyio_1.58.0 ggsignif_0.6.0
## [79] reprex_1.0.0 geneplotter_1.66.0 stringi_1.5.3
## [82] highr_0.8 vctrs_0.3.6 pillar_1.4.7
## [85] lifecycle_0.2.0 BiocManager_1.30.10 data.table_1.13.6
## [88] bitops_1.0-6 R6_2.5.0 latticeExtra_0.6-29
## [91] affy_1.66.0 KernSmooth_2.23-18 gridExtra_2.3
## [94] rio_0.5.16 MASS_7.3-53 assertthat_0.2.1
## [97] withr_2.4.1 GenomeInfoDbData_1.2.3 hms_1.0.0
## [100] grid_4.0.2 rpart_4.1-15 coda_0.19-4
## [103] rmarkdown_2.6 carData_3.0-4 bbmle_1.0.23.1
## [106] numDeriv_2016.8-1.1 lubridate_1.7.9.2 base64enc_0.1-3